Objectives: The goal of this kernel is to perform sentiment analysis based on the dialogues of main characters in Star War. The visualization section will be made interactive via plotly and highcharter. Hope the kernel is easy to read and enjoy!
If you like the kernel, please give me an upvote and thanks!
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, tm, plotly, highcharter, viridis,
wordcloud, wordcloud2, plotrix, tidytext,
reshape2, ggthemes, qdap)ep4 <- read.table("input/SW_EpisodeIV.txt")
ep5 <- read.table("input/SW_EpisodeV.txt")
ep6 <- read.table("input/SW_EpisodeVI.txt")
combined <- bind_rows(ep4, ep5, ep6)
rm(ep4, ep5, ep6)# clean corpus
cleanCorpus <- function(corpus){
corpus.tmp <- tm_map(corpus, removePunctuation)
corpus.tmp <- tm_map(corpus.tmp, stripWhitespace)
corpus.tmp <- tm_map(corpus.tmp, content_transformer(tolower))
v_stopwords <- c(stopwords("en"), c("thats","weve","hes","theres","ive","im",
"will","can","cant","dont","youve","us",
"youre","youll","theyre","whats","didnt"))
corpus.tmp <- tm_map(corpus.tmp, removeWords, v_stopwords)
corpus.tmp <- tm_map(corpus.tmp, removeNumbers)
return(corpus.tmp)
}
# frequent terms
frequentTerms <- function(text){
s.cor <- Corpus(VectorSource(text))
s.cor.cl <- cleanCorpus(s.cor)
s.tdm <- TermDocumentMatrix(s.cor.cl)
s.tdm <- removeSparseTerms(s.tdm, 0.999)
m <- as.matrix(s.tdm)
word_freqs <- sort(rowSums(m), decreasing=TRUE)
dm <- data.frame(word=names(word_freqs), freq=word_freqs)
return(dm)
}
# clean by each character
clean_top_char <- function(dataset){
all_dialogue <- list()
namelist <- list()
for (i in 1:10){
name <- top_chars$character[i]
dialogue <- paste(dataset$dialogue[dataset$character == name], collapse = " ")
all_dialogue <- c(all_dialogue, dialogue)
namelist <- c(namelist, name)
}
all_clean <- all_dialogue %>%
VectorSource() %>%
Corpus() %>%
cleanCorpus() %>%
TermDocumentMatrix() %>%
as.matrix()
colnames(all_clean) <- namelist
assign("all_clean",all_clean,.GlobalEnv)
all_clean %>% head()
}top_chars <- combined %>% count(character) %>% arrange(desc(n)) %>% head(20)
hchart(top_chars, type = 'treemap',hcaes(x = "character", value = 'n', color = 'n'))combined$dialogue %>%
frequentTerms() %>%
# dim()
head(30) %>%
mutate(word = factor(word))%>%
plot_ly(x = ~reorder(word,-freq), y = ~freq, colors = viridis(10)) %>%
add_bars(color = ~word) %>%
layout(title = "Top 30 Words",
yaxis = list(title = " "),
xaxis = list(title = ""),
margin = list(l = 100))clean_top_char(combined)## Docs
## Terms LUKE HAN THREEPIO LEIA VADER BEN LANDO YODA EMPEROR
## aaargh 1 0 0 0 0 0 0 0 0
## academy 3 0 0 0 0 0 0 0 0
## accelerator 1 0 0 0 0 0 0 0 0
## accepted 1 0 0 0 2 0 0 0 0
## across 1 0 1 1 0 0 0 0 0
## action 1 0 0 0 0 0 0 0 0
## Docs
## Terms RED LEADER
## aaargh 0
## academy 0
## accelerator 0
## accepted 0
## across 1
## action 0
commonality.cloud(all_clean[,c("LUKE","THREEPIO")], colors = "steelblue1", at.least = 2, max.words = 100)comparison.cloud(all_clean[,c("LUKE","THREEPIO")], colors = c("#F8766D", "#00BFC4"), max.words=50)common_words <- all_clean %>%
as.data.frame() %>%
rownames_to_column() %>%
filter(LUKE>0, THREEPIO>0) %>%
# select(LUKE, THREEPIO) %>%
mutate(difference = abs(LUKE - THREEPIO)) %>%
arrange(desc(difference))
common_words_25 <- common_words%>%
head(25)
# Create the pyramid plot
pyramid.plot(common_words_25$LUKE, common_words_25$THREEPIO,
labels = common_words_25$rowname, gap = 8,
top.labels = c("LUKE", "Words", "THREEPIO"),
main = "Words in Common", laxlab = NULL,
raxlab = NULL, unit = NULL)## [1] 5.1 4.1 4.1 2.1
all_clean %>%
as.data.frame() %>%
rownames_to_column(var = 'word') %>%
inner_join(get_sentiments("loughran"), by = 'word') %>%
group_by(sentiment) %>%
summarise(number = sum(LUKE)) %>%
plot_ly(labels = ~sentiment, values = ~number) %>%
add_pie(hole = 0.6) %>%
layout(title = "LUKE Emotions", showlegend = T,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))all_clean %>%
as.data.frame() %>%
rownames_to_column(var = 'word') %>%
inner_join(get_sentiments("loughran"), by = 'word') %>%
group_by(sentiment) %>%
summarise(number = sum(THREEPIO)) %>%
plot_ly(labels = ~sentiment, values = ~number) %>%
add_pie(hole = 0.6) %>%
layout(title = "THREEPIO Emotions", showlegend = T,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))all_clean %>%
as.data.frame() %>%
rownames_to_column(var = 'word') %>%
inner_join(get_sentiments("loughran"), by = 'word') %>%
select(word, LUKE, sentiment) %>%
filter(LUKE!=0) %>%
spread(sentiment, LUKE, fill = 0) %>%
column_to_rownames(var = 'word') %>%
comparison.cloud(colors = c("#F8766D", "#00BFC4", "firebrick", "steelblue"), max.words=50)all_clean %>%
as.data.frame() %>%
rownames_to_column(var = 'word') %>%
inner_join(get_sentiments("bing"), by = 'word') %>%
select(word, VADER, sentiment) %>%
spread(sentiment, VADER, fill = 0) %>%
column_to_rownames(var = 'word') %>%
comparison.cloud(colors = c("#F8766D", "#00BFC4"), max.words=50)senti_LUKE_THREE <- all_clean %>%
as.data.frame() %>%
rownames_to_column(var = 'word') %>%
inner_join(get_sentiments("nrc"), by = 'word')%>%
select(LUKE, THREEPIO, sentiment) %>%
group_by(sentiment) %>%
summarise(sum_luke = sum(LUKE),
sum_threepio = sum(THREEPIO))
pyramid.plot(senti_LUKE_THREE$sum_luke, senti_LUKE_THREE$sum_threepio,
labels = senti_LUKE_THREE$sentiment, gap = 30,
top.labels = c("LUKE", "Sentiment", "THREEPIO"),
main = "Sentiment Comparison", laxlab = NULL,
raxlab = NULL, unit = NULL)## [1] 5.1 4.1 4.1 2.1
Master Yoda
# Word association
word_associate(combined$dialogue, match.string = c("yoda"),
stopwords = c(stopwords("english"), c("thats","weve","hes","theres","ive","im",
"will","can","cant","dont","youve","us",
"youre","youll","theyre","whats","didnt")),
network.plot = TRUE, cloud.colors = c("gray85", "darkred"))## row group unit text ...
## 1 1097 all 1097 There you will learn from Yoda, the Jedi Master who instructed me.
## 2 1107 all 1107 Yoda...
## 3 1359 all 1359 Now all I have to do is find this Yoda...if he even exists.
## 4 1388 all 1388 Oohhh. Jedi Master. Yoda. You seek Yoda.
## 5 1437 all 1437 Look, I'm sure it's delicious. I just don't understand why we can't see Yoda now.
## 6 1439 all 1439 How far away is Yoda? Will it take us long to get there?
## 7 1440 all 1440 Not far. Yoda not far. Patience. Soon you will be with him. Rootleaf, I cook. Why wish you become Jedi? Hm?
## 8 1449 all 1449 Yoda! I am ready. I...Ben! I can be a Jedi. Ben, tell him I'm ready.
## 9 1638 all 1638 Even Yoda cannot see their fate.
## 10 1642 all 1642 But I've learned so much since then. Master Yoda, I promise to return and finish what I've begun. You have my word.
## 11 2057 all 2057 Master Yoda, you can't die.
## 12 2063 all 2063 Master Yoda... is Darth Vader my father?
## 13 2065 all 2065 Yoda, I must know.
## 14 2076 all 2076 Yoda will always be with you.
## 15 2084 all 2084 When I first knew him, your father was already a great pilot. But I was amazed how strongly the Force was with him. I took it upon myself to train him as a Jedi. I thought that I could instruct him just as well as Yoda. I was wrong. My pride has had terrible consequences for the galaxy.
## 16 2090 all 2090 Vader humbled you when first you met him, Luke...but that experience was part of your training. It taught you, among other things, the value of patience. Had you not been so impatient to defeat Vader then, you could have finished your training here with Yoda. You would have been prepared.
## 17 2097 all 2097 Yoda spoke of another.
# Add title
title(main = "Master Yoda")Vader’s Comment towards Rebel
# Word association
word_associate(combined$dialogue[combined$character == 'VADER'], match.string = c("rebel"),
stopwords = c(stopwords("english"), c("thats","weve","hes","theres","ive","im",
"will","can","cant","dont","youve","us",
"youre","youll","theyre","whats","didnt")),
network.plot = TRUE, cloud.colors = c("gray85", "darkred"))## row group unit text
## 1 4 all 4 Don't play games with me, Your Highness. You weren't on any mercy mission this time. You passed directly through a restricted system. Several transmissions were beamed to this ship by Rebel spies. I want to know what happened to the plans they sent you.
## 2 5 all 5 You're a part of the Rebel Alliance... and a traitor. Take her away!
## 3 6 all 6 I have traced the Rebel spies to her. Now she is my only link to find their secret base!
## 4 13 all 13 And, now Your Highness, we will discuss the location of your hidden Rebel base.
## 5 17 all 17 I told you she would never consciously betray the Rebellion.
## 6 31 all 31 This will be a day long remembered. It has seen the end of Kenobi and it will soon see the end of the Rebellion.
## 7 43 all 43 That's it. The Rebels are there.
## 8 46 all 46 The Rebels are alerted to our presence. Admiral Ozzel came out of light-speed too close to the system.
## 9 108 all 108 What of the reports of the Rebel fleet massing near Sullust?
## 10 114 all 114 A small Rebel force has penetrated the shield and landed on Endor.
# Add title
title(main = "Vader Rebel Comment")